pacman::p_load(
  tidyverse,
  yardstick,
  tictoc,
  dtplyr,
  ggthemes,
  ggrepel,
  fs,
  fst,
  dtplyr,
  lubridate
)


if (LOOP != "combined") {
  
  path_thresh <- paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", 
                        "threshold_inputs_", TRAINING_SUFFIX, "_", RAND_NO_CID_SMALLEST_LARGEST,
                        "_", MODEL_METRIC, "/")
  
  df_thresh_raw <- paste0(path_thresh, list.files(path_thresh)) %>% 
    str_subset("thresh_") %>% 
    map_df(~read_csv(.)) %>% 
    select(-...1) %>% 
    glimpse()
  
  file_name_fitted_merged <- paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, 
                                    "/", "fitted_merged_", SUFFIX_FITTED_, "/", 
                                    "test_fitted_CRA.fst")
  
  if (LLH_COND == TRUE) {
    
    df_raw_temp <- read_fst(file_name_fitted_merged)
    
    re_quarter <- unique(df_raw_temp$qtr) %>% 
      str_remove_all("-") %>% 
      paste0(collapse = "|") 
    
    
    v_rand_no_cid_min_max <- RAND_NO_CID_SMALLEST_LARGEST %>% 
      str_split("_") %>% 
      map(~as.numeric(.)) 
    
    re_rand_no_cid <- seq(v_rand_no_cid_min_max[[1]][1], v_rand_no_cid_min_max[[1]][2]) %>%
      str_pad(4, "left", "0") %>% 
      paste0(collapse = "|") 
    
    path_open_accounts <- "../../data/open_accounts/"
    
    tic()
    df_acc <- dir_ls(path_open_accounts) %>% 
      str_subset(re_quarter) %>% 
      str_subset(re_rand_no_cid) %>% 
      map_df(~read_fst(., columns = c("cid", "qtr", "open_acc_has_succesive_2qtr_within_8qtr"))) %>% 
      filter(open_acc_has_succesive_2qtr_within_8qtr == 1) %>%
      select(cid, qtr) 
    toc()
    
    tic()
    df_raw <- df_raw_temp %>% 
      inner_join(df_acc) 
    toc()
    
  

      
  } else {
    
    tic()
    df_raw <- read_fst(file_name_fitted_merged)
    toc()
    
  }  
  
  
  
  df_raw %>% 
    #count(t_default) %>% 
    #yardstick::roc_auc(t_default, logistic, event_level = "second") %>% 
    #yardstick::f_meas(t_default, logistic, event_level = "second") %>% 
    glimpse()
  
  df_t <- df_raw %>% 
    #count(t_default) %>% 
    yardstick::roc_curve(t_default, logistic, event_level = "second") %>% 
    glimpse()
  
  df_temp <- df_raw %>% 
    filter(!is.na(xgb) & !is.na(logistic) & !is.na(riskscore)) %>% 
    mutate(
      t_non_default = 1 - as.numeric(as.character(t_default)),
      t_non_default = factor(t_non_default, levels = c(0, 1)),
      value_riskscore = riskscore,
      value_xgb = xgb * -1,
      value_logistic = logistic * -1,
      d_Income_Level = case_when(Income_Level %in% 1:2 ~ 0,
                                 Income_Level %in% 3:4 ~ 1,
                                 TRUE ~ NA_real_)
    ) %>% 
    filter(!is.na(d_Income_Level)) %>% 
    select(-Income_Level) %>% 
    glimpse()
  
  file_name_pct <- paste0(path_thresh, "percentiles.csv")
  
  df_pct_raw <- read_csv(file_name_pct) %>% 
    rename(value_type = value_type_)
  
  v_value_type <- c("riskscore", "logistic", "xgb")
  
  
  df_thresh <- df_thresh_raw %>%
    #filter(loss_profit_ratio %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) %>% 
    filter(loss_profit_ratio %in% c(1, 3, 5, 7, 9, 11)) %>% 
    pivot_longer(cols = c(value_majority, value_minority), names_to = "group", values_to = "thresh") %>%
    mutate(
      thresh_type = paste0("thresh_", thresh_type),
      value_type = str_remove(value_type, "value_"),
      group = str_remove(group, "value_"),
      group = str_to_title(group)
    ) %>%
    pivot_wider(names_from = thresh_type, values_from = thresh) %>%
    rename(thresh_tpr_strong = thresh_tpr, model_type = value_type) %>%
    mutate(
      thresh_tpr_med = thresh_tpr_strong + (thresh_single - thresh_tpr_strong) * 0.33,
      thresh_tpr_weak = thresh_tpr_strong + (thresh_single - thresh_tpr_strong) * 0.66,
      d_Income_Level = case_when(
        group == "Majority" ~ 1,
        group == "Minority" ~ 0
      )
    ) %>% 
    print()
  
  
  
  ########################################################
  
  df_pct <- df_pct_raw %>%
    filter(value_type == "riskscore") %>%
    glimpse()
  
  v_pct <- df_pct$value
  v_pct[1] <- 0
  v_pct[length(v_pct)] <- 9999
  
  df_rs <- df_temp %>%
    rename(value = value_riskscore) %>%
    mutate(
      value = as.numeric(as.character(cut(value, v_pct, df_pct$pct[2:length(df_pct$pct)], include_lowest = TRUE))),
      model_type = "riskscore"
    ) %>%
    select(cid, qtr, model_type, d_Income_Level, t_non_default, value) %>%
    glimpse()
  
  ##########################################################
  
  df_pct <- df_pct_raw %>%
    filter(value_type == "xgb") %>%
    distinct() %>%
    glimpse()
  
  v_pct <- df_pct$value
  v_pct[1] <- -9999
  v_pct[length(v_pct)] <- 9999
  
  df_xgb <- df_temp %>%
    rename(value = value_xgb) %>%
    mutate(
      value = as.numeric(as.character(cut(value, v_pct, df_pct$pct[2:length(df_pct$pct)], include_lowest = TRUE))),
      model_type = "xgb"
    ) %>%
    select(cid, qtr, model_type, d_Income_Level, t_non_default, value) %>%
    glimpse()
  
  ###############################################################
  
  df_pct <- df_pct_raw %>%
    filter(value_type == "logistic") %>%
    distinct() %>%
    glimpse()
  
  v_pct <- df_pct$value
  v_pct[1] <- -9999
  v_pct[length(v_pct)] <- 9999
  
  df_logistic <- df_temp %>%
    rename(value = value_logistic) %>%
    mutate(
      value = as.numeric(as.character(cut(value, v_pct, df_pct$pct[2:length(df_pct$pct)], include_lowest = TRUE))),
      model_type = "logistic"
    ) %>%
    select(cid, qtr, model_type, d_Income_Level, t_non_default, value) %>%
    glimpse()
  
  df <- df_rs %>%
    bind_rows(df_xgb) %>%
    bind_rows(df_logistic) %>%
    glimpse()

  df_optimal_single_thresh <- df %>% 
    #filter(model_type == "xgb") %>%
    arrange(model_type, value) %>%
    group_by(model_type, value, t_non_default) %>%
    summarize(
      n = n()
    ) %>% 
    ungroup() %>% 
    pivot_wider(names_from = t_non_default, names_prefix = "true_", values_from = n, values_fill = 0) %>% 
    arrange(model_type, value) %>% 
    group_by(model_type) %>% 
    mutate(
      true_0_cum = cumsum(true_0),
      true_1_cum = cumsum(true_1)
    ) %>%
    arrange(model_type, -value) %>% 
    group_by(model_type) %>% 
    mutate(
      true_0_cum_rev = cumsum(true_0),
      true_1_cum_rev = cumsum(true_1)
    ) %>%
    arrange(model_type, value) %>% 
    group_by(model_type) %>% 
    mutate(
      true_0_total = sum(true_0),
      true_1_total = sum(true_1),
      num_obs_total = true_0_total + true_1_total,
      fp = true_0_total - true_0_cum,
      fn = true_1_cum,
      tp = true_1_total - true_1_cum,
      tn = true_0_cum,
      tpr = tp / (tp + fn),
      fpr = fp / (tn + fp),
      profit_1 = tp - 1 * fp,
      #profit_2 = tp - 2 * fp,
      profit_3 = tp - 3 * fp,
      #profit_4 = tp - 4 * fp,
      profit_5 = tp - 5 * fp,
      #profit_6 = tp - 6 * fp,
      profit_7 = tp - 7 * fp,
      #profit_8 = tp - 8 * fp,
      profit_9 = tp - 9 * fp,
      #profit_10 = tp - 10 * fp,
      profit_11 = tp - 11 * fp,
      #profit_12 = tp - 12 * fp,
      # profit_14 = tp - 14 * fp,
      # profit_16 = tp - 16 * fp,
      # profit_18 = tp - 18 * fp,
      # profit_20 = tp - 20 * fp
    ) %>% 
    pivot_longer(starts_with("profit_"), names_to = "loss_profit_ratio", values_to = "profit") %>% 
    mutate(
      loss_profit_ratio = as.numeric(str_remove(loss_profit_ratio, "profit_"))
    ) %>% 
    arrange(model_type, loss_profit_ratio, value) %>%
    arrange(model_type, loss_profit_ratio, -profit) %>% 
    group_by(model_type, loss_profit_ratio) %>% 
    filter(row_number() == 1) %>% 
    ungroup() %>% 
    select(model_type, loss_profit_ratio, optimal_single_thresh = value) %>% 
    glimpse()

  
  df_values_by_group <- df %>% 
    #filter(model_type == "xgb") %>%
    group_by(model_type, value, t_non_default, d_Income_Level) %>%
    summarize(
      n = n()
    ) %>% 
    ungroup() %>% 
    pivot_wider(names_from = t_non_default, names_prefix = "true_", values_from = n, values_fill = 0) %>% 
    arrange(model_type, d_Income_Level, value) %>%
    group_by(model_type, d_Income_Level) %>% 
    mutate(
      true_0_cum = cumsum(true_0),
      true_1_cum = cumsum(true_1)
    ) %>%
    group_by(model_type, d_Income_Level) %>% 
    arrange(model_type, d_Income_Level, -value) %>% 
    mutate(
      true_0_cum_rev = cumsum(true_0),
      true_1_cum_rev = cumsum(true_1)
    ) %>%
    arrange(model_type, d_Income_Level, value) %>% 
    mutate(
      true_0_total = sum(true_0),
      true_1_total = sum(true_1),
      num_obs_total = true_0_total + true_1_total,
      fp = true_0_cum_rev,
      fn = true_1_cum,
      tp = true_1_cum_rev,
      tn = true_0_cum,
      tpr = tp / (tp + fn),
      fpr = fp / (tn + fp),
      profit_1 = tp - 1 * fp,
      #profit_2 = tp - 2 * fp,
      profit_3 = tp - 3 * fp,
      #profit_4 = tp - 4 * fp,
      profit_5 = tp - 5 * fp,
      #profit_6 = tp - 6 * fp,
      profit_7 = tp - 7 * fp,
      #profit_8 = tp - 8 * fp,
      profit_9 = tp - 9 * fp,
      #profit_10 = tp - 10 * fp,
      profit_11 = tp - 11 * fp,
      # profit_12 = tp - 12 * fp,
      # profit_14 = tp - 14 * fp,
      # profit_16 = tp - 16 * fp,
      # profit_18 = tp - 18 * fp,
      # profit_20 = tp - 20 * fp
    ) %>% 
    pivot_longer(starts_with("profit_"), names_to = "loss_profit_ratio", values_to = "profit") %>% 
    mutate(
      loss_profit_ratio = as.numeric(str_remove(loss_profit_ratio, "profit_"))
    ) %>% 
    glimpse()
  
  df_benchmark_tpr_for_strong <- df_values_by_group %>% 
    inner_join(df_optimal_single_thresh) %>% 
    filter(value == optimal_single_thresh) %>% 
    filter(d_Income_Level == 1) %>% 
    ungroup() %>% 
    select(model_type, loss_profit_ratio, target_tpr_for_strong_thresh = tpr) %>% 
    ungroup() %>% 
    glimpse()
  
  df_thresh_for_lmi <- df_values_by_group %>%
    filter(d_Income_Level == 0) %>% 
    inner_join(df_benchmark_tpr_for_strong) %>% 
    inner_join(df_optimal_single_thresh) %>% 
    mutate(
      tpr_diff = target_tpr_for_strong_thresh  - tpr,
      tpr_diff_abs = abs(tpr_diff),
      thresh_lmi_strong = value,
      thresh_lmi_medium = round(value + 0.33 *(optimal_single_thresh - value)),
      thresh_lmi_weak = round(value + 0.66 *(optimal_single_thresh - value))
    ) %>% 
    arrange(model_type, loss_profit_ratio, tpr_diff_abs) %>% 
    group_by(model_type, loss_profit_ratio) %>% 
    filter(row_number() == 1) %>%
    ungroup() %>% 
    glimpse()
  
  df_thresh_lmi <- df_thresh_for_lmi %>% 
    select(model_type, loss_profit_ratio, d_Income_Level, thresh_single = optimal_single_thresh,
           thresh_tpr_strong = thresh_lmi_strong, 
           thresh_tpr_med = thresh_lmi_medium,
           thresh_tpr_weak = thresh_lmi_weak) %>% 
    glimpse()
  
  df_thresh_nonlmi <- df_thresh_for_lmi %>% 
    select(model_type, loss_profit_ratio, thresh_single = optimal_single_thresh,
           thresh_tpr_strong = optimal_single_thresh, 
           thresh_tpr_med = optimal_single_thresh,
           thresh_tpr_weak = optimal_single_thresh) %>% 
    mutate(
      d_Income_Level = 1
    ) %>% 
    glimpse()
  
  df_thresh <- df_thresh_lmi %>% 
    bind_rows(df_thresh_nonlmi) %>% 
    glimpse()
  
  df_thresh_for_tpr_fpr_plots <- df_thresh %>% 
    select(-thresh_tpr_med, -thresh_tpr_weak) %>% 
    mutate(
      value_type = paste0("value_", model_type),
      d_Income_Level = if_else(d_Income_Level == 0, "value_minority", "value_majority")
    ) %>% 
    pivot_longer(cols = starts_with("thresh_"), names_to = "thresh_type") %>%
    pivot_wider(names_from = "d_Income_Level", values_from = "value") %>% 
    mutate(
      thresh_type = case_when(
        thresh_type == "thresh_single" ~ "single",
        thresh_type == "thresh_tpr_strong" ~ "tpr"
      )
    ) %>%
    distinct() %>% 
    select(-model_type) %>% 
    glimpse()
  
  df_thresh_for_tpr_fpr_plots %>% 
    filter(loss_profit_ratio == 1) %>% 
    print()
  
  path_df_thresh_for_tpr_fpr_plots <- paste0("../../data/pipeline_outputs/",
                                             SPECIAL_SUFFIX, "/threshold_inputs_",
                                             TRAINING_SUFFIX, "_", RAND_NO_CID_SMALLEST_LARGEST,
                                             "_", MODEL_METRIC, "/")
  
  write_csv(df_thresh_for_tpr_fpr_plots, paste0(path_df_thresh_for_tpr_fpr_plots, "df_for_tpr_fpr_plots.csv"))
  
    

  tic()
  df_decisions <- df %>%
    #sample_n(1000000) %>% 
    #filter(model_type == "xgb") %>%
    lazy_dt() %>%
    inner_join(df_thresh) %>%
    pivot_longer(cols = starts_with("thresh_"), names_to = "thresh_type", values_to = "thresh") %>%
    mutate(t_non_default_pred = as.character(as.numeric(value > thresh))) %>%
    # mutate(
    #   t_non_default = factor(t_non_default, levels = c(0, 1)),
    #   t_non_default_pred = factor(t_non_default_pred, levels = c(0, 1))
    # ) %>%
    as_tibble() %>%
    #filter(thresh_type == "thresh_single") %>%
    glimpse()
  toc()
  
  glimpse(df_decisions)
  
  write_fst(df_decisions, paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX,
                                    "/threshold_inputs_", TRAINING_SUFFIX, "_",
                                    RAND_NO_CID_SMALLEST_LARGEST, "_", MODEL_METRIC,
                                    "/df_decisions_temp.fst")) 
  # Profit--Fairness Tradeoff -----------------------------------------------
  
  df_metrics_temp <- df_decisions %>%
    #lazy_dt() %>% 
    group_by(model_type, thresh_type, loss_profit_ratio, d_Income_Level) %>%
    summarize(
      n = sum(t_non_default == "0"),
      p = sum(t_non_default == "1"),
      tp = sum((t_non_default == "1") & (t_non_default_pred == "1")),
      fp = sum((t_non_default == "0") & (t_non_default_pred == "1")),
      tn = sum((t_non_default == "0") & (t_non_default_pred == "0")),
      fn = sum((t_non_default == "1") & (t_non_default_pred == "0"))
    ) %>%
    ungroup()
  
  write_fst(df_metrics_temp, paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX,
                                    "/threshold_inputs_", TRAINING_SUFFIX, "_",
                                    RAND_NO_CID_SMALLEST_LARGEST, "_", MODEL_METRIC,
                                    "/df_metrics_temp.fst"))
  
} else {
  
  V_TRAINING_SUFFIX <- vector(length = NUM_MODELS)
  
  for(i in 1:NUM_MODELS) {
    
    TRAINING_START_QTR <- V_TRAINING_START_QTR[i]
    TRAINING_END_QTR <- V_TRAINING_END_QTR[i]
    
    TRAINING_SUFFIX <- paste0("Train",
                              year(ymd(TRAINING_START_QTR)),
                              "Q", quarter(ymd(TRAINING_START_QTR)),
                              year(ymd(TRAINING_END_QTR)),
                              "Q",
                              quarter(ymd(TRAINING_END_QTR)))
    
    
    V_TRAINING_SUFFIX[i] <- paste0(TRAINING_SUFFIX, "_", RAND_NO_CID_SMALLEST_LARGEST, "_", MODEL_METRIC)
    
  }
  
  
  Load_Metrics_Temp <- function(training_suffix) {
    
    read_fst(paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX,
                           "/threshold_inputs_", training_suffix, "/df_metrics_temp.fst"))
    
  }
  
  df_metrics_temp <- map_dfr(V_TRAINING_SUFFIX, Load_Metrics_Temp) %>% 
  group_by(model_type, thresh_type, loss_profit_ratio, d_Income_Level) %>%
  summarize(
    across(n:fn, ~ sum(.))
  )
  
  write_fst(df_metrics_temp, paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, 
                                    "/threshold_inputs_combined/df_metrics_temp_combined.fst"))
  
}

df_metrics <- df_metrics_temp %>% 
  mutate(
  group = case_when(d_Income_Level == 1 ~ "majority",
                    d_Income_Level == 0 ~ "minority",
                    TRUE ~ NA_character_),  
  loans_given = tp + fp,
  pct_repay = tp / loans_given,
  pct_default = fp / loans_given,
  profit = tp - loss_profit_ratio * fp,
  tpr = tp / p,
  fpr = fp / n,
  thresh_type = str_remove(thresh_type, "thresh_")
) %>%
  select(model_type, thresh_type, loss_profit_ratio, group, loans_given, pct_repay, pct_default, profit, tpr, fpr) %>%
  pivot_wider(names_from = group, values_from = c(loans_given, pct_repay, pct_default, profit, tpr, fpr)) %>%
  mutate(
    tpr_diff = tpr_minority - tpr_majority,
    fpr_diff = fpr_minority - fpr_majority,
    ave_odds_diff = (tpr_diff + fpr_diff) / 2,
    profit = profit_minority + profit_majority
  ) %>%
  arrange(loss_profit_ratio, -tpr_diff) %>%
  #as_tibble() %>% 
  glimpse()



if (LOOP == "combined") {
  
  path_plots <- str_glue("../../data/pipeline_outputs/{SPECIAL_SUFFIX}/plots_combined/")
  

} else {
  
  path_plots <- str_glue("../../data/pipeline_outputs/{SPECIAL_SUFFIX}/plots_{SUFFIX_FITTED_}/")
  
}

file_name_out <- paste0(path_plots, "data_tradeoff.fst")
write_fst(df_metrics, file_name_out)

# Winners and losers ------------------------------------------------------
# 

if (LOOP != "combined") {
  
  tic()
  df_metrics <- df_decisions %>%
    #filter(model_type == "xgb") %>%
    group_by(model_type, thresh_type, loss_profit_ratio, d_Income_Level) %>%
    summarize(
      n = sum(t_non_default == "0"),
      p = sum(t_non_default == "1"),
      tp = sum((t_non_default == "1") & (t_non_default_pred == "1")),
      fp = sum((t_non_default == "0") & (t_non_default_pred == "1")),
      tn = sum((t_non_default == "0") & (t_non_default_pred == "0")),
      fn = sum((t_non_default == "1") & (t_non_default_pred == "0"))
    ) %>%
    pivot_longer(cols = c(tp, fp, tn, fn), names_to = "metric") %>%
    group_by(model_type, d_Income_Level, thresh_type, loss_profit_ratio) %>%
    mutate(
      value = value / sum(value)
    ) %>%
    ungroup() %>%
    mutate(
      #model_type = "Riskscore",
      thresh_type = str_remove(thresh_type, "thresh_"),
      d_Income_Level = str_to_lower(d_Income_Level),
      constraint = case_when(
        thresh_type == "min_cost" ~ "Max Profit",
        thresh_type == "single" ~ "Blind",
        thresh_type == "tpr_weak" ~ "Weak",
        thresh_type == "tpr_med" ~ "Medium",
        thresh_type == "tpr_strong" ~ "Strong"
      ),
      constraint = factor(constraint, levels = c("Strong", "Medium", "Weak", "Blind", "Max Profit"))
    ) %>%
    arrange(loss_profit_ratio, model_type, d_Income_Level, metric, desc(constraint)) %>%
    group_by(loss_profit_ratio, model_type, d_Income_Level, metric) %>%
    mutate(
      label_diff = value - lag(value, 4),
      label_diff = round(label_diff * 100, 2),
      label_diff = ifelse(label_diff > 0, paste0("+", label_diff, "pp"), paste0(label_diff, "pp"))
    ) %>%
    ungroup() %>%
    glimpse()
  toc()
  
  file_name_out <- paste0(path_plots, "data_win_lose.fst")
  
  write_fst(df_metrics, file_name_out)
  
} else{
  
  
  V_TRAINING_SUFFIX <- vector(length = NUM_MODELS)
  
  for(i in 1:NUM_MODELS) {
    
    TRAINING_START_QTR <- V_TRAINING_START_QTR[i]
    TRAINING_END_QTR <- V_TRAINING_END_QTR[i]
    
    TRAINING_SUFFIX <- paste0("Train",
                              year(ymd(TRAINING_START_QTR)),
                              "Q", quarter(ymd(TRAINING_START_QTR)),
                              year(ymd(TRAINING_END_QTR)),
                              "Q",
                              quarter(ymd(TRAINING_END_QTR)))
    
    
    V_TRAINING_SUFFIX[i] <- paste0(TRAINING_SUFFIX, "_", RAND_NO_CID_SMALLEST_LARGEST, "_", MODEL_METRIC)
    
  }
  
  
  Load_Decisions_Temp <- function(training_suffix) {
    
    read_fst(paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX,
                                    "/threshold_inputs_", training_suffix, 
                                    "/df_decisions_temp.fst"))
  }
  
  df_decisions_all <- map_dfr(V_TRAINING_SUFFIX, Load_Decisions_Temp) 
  
  df_metrics <- df_decisions_all %>%
    #filter(model_type == "xgb") %>%
    group_by(model_type, thresh_type, loss_profit_ratio, d_Income_Level) %>%
    summarize(
      n = sum(t_non_default == "0"),
      p = sum(t_non_default == "1"),
      tp = sum((t_non_default == "1") & (t_non_default_pred == "1")),
      fp = sum((t_non_default == "0") & (t_non_default_pred == "1")),
      tn = sum((t_non_default == "0") & (t_non_default_pred == "0")),
      fn = sum((t_non_default == "1") & (t_non_default_pred == "0"))
    ) %>%
    pivot_longer(cols = c(tp, fp, tn, fn), names_to = "metric") %>%
    group_by(model_type, d_Income_Level, thresh_type, loss_profit_ratio) %>%
    mutate(
      value = value / sum(value)
    ) %>%
    ungroup() %>%
    mutate(
      #model_type = "Riskscore",
      thresh_type = str_remove(thresh_type, "thresh_"),
      d_Income_Level = str_to_lower(d_Income_Level),
      constraint = case_when(
        thresh_type == "min_cost" ~ "Max Profit",
        thresh_type == "single" ~ "Blind",
        thresh_type == "tpr_weak" ~ "Weak",
        thresh_type == "tpr_med" ~ "Medium",
        thresh_type == "tpr_strong" ~ "Strong"
      ),
      constraint = factor(constraint, levels = c("Strong", "Medium", "Weak", "Blind", "Max Profit"))
    ) %>%
    arrange(loss_profit_ratio, model_type, d_Income_Level, metric, desc(constraint)) %>%
    group_by(loss_profit_ratio, model_type, d_Income_Level, metric) %>%
    mutate(
      label_diff = value - lag(value, 4),
      label_diff = round(label_diff * 100, 2),
      label_diff = ifelse(label_diff > 0, paste0("+", label_diff, "pp"), paste0(label_diff, "pp"))
    ) %>%
    ungroup() %>%
    glimpse()
  toc()
  
  file_name_out <- paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/plots_combined/data_win_lose.fst")
  
  write_fst(df_metrics, file_name_out)
  
  
}
  
  



